url = "https://www.health.ny.gov/statistics/vital_statistics/2019/table21.htm"
url1 = "https://www.health.ny.gov/statistics/vital_statistics/2014/table07.htm"
induced_abortion_age =
read_html(url) %>%
html_table(header = FALSE) %>%
first() %>%
janitor::clean_names()
live_birth_age =
read_html(url1) %>%
html_table(header = FALSE) %>%
first() %>%
janitor::clean_names()
data cleaning live birth data
data cleaning induced abortions data
clean_ia_age=
induced_abortion_age %>%
select(1:8) %>%
purrr::set_names(c("borough", "total", "age_less_20", "age_20_24", "age_25_29", "age_30_34", "age_35_39", "age_plus_40")) %>%
slice(14:70) %>%
mutate(
borough = str_replace(borough, "Kings", "Brooklyn"),
borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
borough = str_replace(borough, "Richmond", "Staten_Island"),
total = str_replace(total, ",", ""),
age_less_20 = str_replace(age_less_20, ",", ""),
age_20_24 = str_replace(age_20_24, ",", ""),
age_25_29 = str_replace(age_25_29, ",", ""),
age_35_39 = str_replace(age_30_34, ",", ""),
age_plus_40 = str_replace(age_plus_40, ",", "")
) %>%
mutate_at(c("total", "age_less_20", "age_20_24", "age_25_29", "age_30_34", "age_35_39", "age_plus_40"), as.numeric) %>%
janitor::clean_names()
created regions based on this website: https://statejobs.ny.gov/assets/help/regionMapText.cfm
renaming live births data of counties into regions.
rename_lb_age= clean_lb_age %>%
transform(borough = gsub(pattern = "Albany|Columbia|Fulton|Greene|Montgomery|Rensselaer|Saratoga|Schenectady|Schoharie|Warren|Washington", replacement = "Saratoga", borough)) %>%
transform(borough = gsub(pattern = "Franklin|Clinton|Essex|Hamilton", replacement = "Eastern Adirondacks", borough)) %>%
transform(borough = gsub(pattern = "Herkimer|Jefferson|Lewis|Oneida|Oswego|St Lawrence", replacement = "Western Adirondacks", borough)) %>%
transform(borough = gsub(pattern = "Broome|Cayuga|Chenango|Cortland|Madison|Onondaga|Otsego|Tioga|Tompkins", replacement = "Central New York", borough)) %>%
transform(borough=gsub(pattern="Chemung|Genesee|Livingston|Monroe|Ontario|Schuyler|Seneca|Steuben|Wayne|Yates",replacement="Finger Lakes", borough)) %>%
transform(borough=gsub(pattern="Allegany|Cattaraugus|Chautauqua|Erie|Niagara|Orleans|Wyoming",replacement="Western New York", borough)) %>%
transform(borough=gsub(pattern="Delaware|Dutchess|Orange|Putnam|Sullivan|Ulster",replacement="Hudson Valley", borough)) %>%
transform(borough=gsub(pattern="Rockland|Westchester", replacement="Westchester/Rockland", borough)) %>%
transform(borough=gsub(pattern="Suffolk|Nassau", replacement="Long Island", borough)) %>%
group_by(borough) %>%
summarize (
total_rate= sum(total),
age_less_20= sum(age_less_20),
age_20_24= sum(age_20_24),
age_25_29= sum(age_25_29),
age_30_34= sum(age_30_34),
age_35_39 = sum(age_35_39),
age_plus_40 = sum(age_plus_40))
renaming induced abortion data of counties into regions.
rename_ia_age= clean_ia_age %>%
transform(borough = gsub(pattern = "Albany|Columbia|Fulton|Greene|Montgomery|Rensselaer|Saratoga|Schenectady|Schoharie|Warren|Washington", replacement = "Saratoga", borough)) %>%
transform(borough = gsub(pattern = "Franklin|Clinton|Essex|Hamilton|Essex/Hamilton|Hamilton/Essex", replacement = "Eastern Adirondacks", borough)) %>%
transform(borough = gsub(pattern = "Herkimer|Jefferson|Lewis|Oneida|Oswego|St Lawrence", replacement = "Western Adirondacks", borough)) %>%
transform(borough = gsub(pattern = "Broome|Cayuga|Chenango|Cortland|Madison|Onondaga|Otsego|Tioga|Tompkins", replacement = "Central New York", borough)) %>%
transform(borough=gsub(pattern="Chemung|Genesee|Livingston|Monroe|Ontario|Schuyler|Seneca|Steuben|Wayne|Yates",replacement="Finger Lakes", borough)) %>%
transform(borough=gsub(pattern="Allegany|Cattaraugus|Chautauqua|Erie|Niagara|Orleans|Wyoming",replacement="Western New York", borough)) %>%
transform(borough=gsub(pattern="Delaware|Dutchess|Orange|Putnam|Sullivan|Ulster",replacement="Hudson Valley", borough)) %>%
transform(borough=gsub(pattern="Rockland|Westchester", replacement="Westchester/Rockland", borough)) %>%
transform(borough=gsub(pattern="Suffolk|Nassau", replacement="Long Island", borough)) %>%
group_by(borough) %>%
summarize (
total_rate= sum(total),
age_less_20= sum(age_less_20),
age_20_24= sum(age_20_24),
age_25_29= sum(age_25_29),
age_30_34= sum(age_30_34),
age_35_39 = sum(age_35_39),
age_plus_40 = sum(age_plus_40))
merging data to get rates from count data
merged_data =
full_join(rename_lb_age, rename_ia_age, by = "borough") %>%
janitor::clean_names() %>%
mutate(
age_less_20 = (age_less_20_y / age_less_20_x)*1000,
age_20_24 = (age_20_24_y / age_20_24_x)*1000,
age_25_29 = (age_25_29_y / age_25_29_x)*1000,
age_30_34 = (age_30_34_y / age_30_34_x)*1000,
age_35_39 = (age_35_39_y / age_35_39_x)*1000,
age_plus_40 = (age_plus_40_y / age_plus_40_x)*1000,
total = (total_rate_y / total_rate_x)*1000) %>%
select(borough, age_less_20, age_20_24, age_25_29, age_30_34, age_35_39, age_plus_40, total)
ggplot: induced abortion rates vs financial plans
abortion_age_plot =
merged_data %>%
select(-total) %>%
pivot_longer(
age_less_20:age_plus_40,
names_to = "age",
values_to = "abortion"
) %>%
plot_ly(x = ~age, y = ~abortion, color = ~borough, type = "bar", colors = "viridis") %>%
layout(title = 'Abortions in NY State by Age', yaxis = list(title = 'Number of Induced Abortions per 1,000 Live Births'))
abortion_age_plot